Code
import torch
import torch.nn as nn
from torchinfo import summary
import numpy as np
July 30, 2022
This blog assumes the reader have a some understanding of mobileNet.This is just a lazy illustration of how much different mobileNet block save. The actual papers have the real numbers. If you want to know about the model please go through the papers MobileNetV1 MobileNetV2. In this we will briefly see how much parameters and floating point operations are required by a normal convolution block, mobileNetV1 and mobileNetV2 for the same input size to produce the same output. We will use torchinfo library for getting the summaries.
simple_convBlock = nn.Sequential(nn.Conv2d(in_channels=input_filters,out_channels=output_filters,kernel_size=3,stride=2,
padding=1,bias=False),nn.BatchNorm2d(output_filters),
nn.ReLU(inplace=True))
printInputAndOutput(simple_convBlock)
Input shape = torch.Size([3, 64, 224, 224])
Output shap = torch.Size([3, 128, 112, 112])
summary(simple_convBlock,input_size=input_size,col_names=["kernel_size", "output_size", "num_params", "mult_adds"])
============================================================================================================================================
Layer (type:depth-idx) Kernel Shape Output Shape Param # Mult-Adds
============================================================================================================================================
Sequential -- [3, 128, 112, 112] -- --
├─Conv2d: 1-1 [3, 3] [3, 128, 112, 112] 73,728 2,774,532,096
├─BatchNorm2d: 1-2 -- [3, 128, 112, 112] 256 768
├─ReLU: 1-3 -- [3, 128, 112, 112] -- --
============================================================================================================================================
Total params: 73,984
Trainable params: 73,984
Non-trainable params: 0
Total mult-adds (G): 2.77
============================================================================================================================================
Input size (MB): 38.54
Forward/backward pass size (MB): 77.07
Params size (MB): 0.30
Estimated Total Size (MB): 115.90
============================================================================================================================================
The main idea is to use depth wise convolution to reduce the parameters and floating point operations required. For more info please read the paper or watch this tutorial by Prof Maziar Raissi
mobileNetBlock = nn.Sequential(
#DEPTHWISE CONV
#we get the depthwise convolution by specifying groups same as in_channels
nn.Conv2d(in_channels=input_filters,out_channels=input_filters,kernel_size=3,
stride=2,padding=1,groups=input_filters,bias=False),
nn.BatchNorm2d(input_filters),
nn.ReLU(inplace=True),
#POINTWISE CONV
nn.Conv2d(in_channels=input_filters,out_channels=output_filters,kernel_size=1,
stride=1,padding=0,bias=False),
nn.BatchNorm2d(output_filters),
nn.ReLU(inplace=True)
)
printInputAndOutput(mobileNetBlock)
Input shape = torch.Size([3, 64, 224, 224])
Output shap = torch.Size([3, 128, 112, 112])
summary(mobileNetBlock,input_size=input_size,col_names=["kernel_size", "output_size", "num_params", "mult_adds"])
============================================================================================================================================
Layer (type:depth-idx) Kernel Shape Output Shape Param # Mult-Adds
============================================================================================================================================
Sequential -- [3, 128, 112, 112] -- --
├─Conv2d: 1-1 [3, 3] [3, 64, 112, 112] 576 21,676,032
├─BatchNorm2d: 1-2 -- [3, 64, 112, 112] 128 384
├─ReLU: 1-3 -- [3, 64, 112, 112] -- --
├─Conv2d: 1-4 [1, 1] [3, 128, 112, 112] 8,192 308,281,344
├─BatchNorm2d: 1-5 -- [3, 128, 112, 112] 256 768
├─ReLU: 1-6 -- [3, 128, 112, 112] -- --
============================================================================================================================================
Total params: 9,152
Trainable params: 9,152
Non-trainable params: 0
Total mult-adds (M): 329.96
============================================================================================================================================
Input size (MB): 38.54
Forward/backward pass size (MB): 115.61
Params size (MB): 0.04
Estimated Total Size (MB): 154.18
============================================================================================================================================
The idea here is to add a residual connection and with this better perfomance was obtained with a slight increase in number of parameters. For more info please read the paper or watch this tutorial by Prof Maziar Raissi
class MobileNetv2Block(nn.Module):
def __init__(self,in_channels,out_channels,expand_ratio,stride=1):
super(MobileNetv2Block,self).__init__()
self.conv1x1Begin = nn.Sequential(
nn.Conv2d(in_channels,in_channels*expand_ratio,kernel_size=1,stride=1,bias=False),
nn.BatchNorm2d(in_channels*expand_ratio),
nn.ReLU6(inplace=True))
self.convDepthWise = nn.Sequential(
nn.Conv2d(in_channels*expand_ratio,in_channels*expand_ratio,kernel_size=3,stride=stride,padding=1,groups=in_channels*expand_ratio,bias=False),
nn.BatchNorm2d(in_channels*expand_ratio),
nn.ReLU6(inplace=True)
)
self.conv1x1Last = nn.Sequential(
nn.Conv2d(in_channels*expand_ratio,out_channels,kernel_size=1,stride=1,bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True))
self.stride = stride
self.use_res_connect = self.stride == 1 and in_channels == out_channels
def forward(self,x):
input_ = x
x = self.conv1x1Begin(x)
x = self.convDepthWise(x)
x = self.conv1x1Last(x)
if self.use_res_connect:
return x+input_
else:
return x
Input shape = torch.Size([3, 64, 224, 224])
Output shap = torch.Size([3, 128, 112, 112])
summary(mobileNetV2Block,input_size=input_size,col_names=["kernel_size", "output_size", "num_params", "mult_adds"])
============================================================================================================================================
Layer (type:depth-idx) Kernel Shape Output Shape Param # Mult-Adds
============================================================================================================================================
MobileNetv2Block -- [3, 128, 112, 112] -- --
├─Sequential: 1-1 -- [3, 128, 224, 224] -- --
│ └─Conv2d: 2-1 [1, 1] [3, 128, 224, 224] 8,192 1,233,125,376
│ └─BatchNorm2d: 2-2 -- [3, 128, 224, 224] 256 768
│ └─ReLU6: 2-3 -- [3, 128, 224, 224] -- --
├─Sequential: 1-2 -- [3, 128, 112, 112] -- --
│ └─Conv2d: 2-4 [3, 3] [3, 128, 112, 112] 1,152 43,352,064
│ └─BatchNorm2d: 2-5 -- [3, 128, 112, 112] 256 768
│ └─ReLU6: 2-6 -- [3, 128, 112, 112] -- --
├─Sequential: 1-3 -- [3, 128, 112, 112] -- --
│ └─Conv2d: 2-7 [1, 1] [3, 128, 112, 112] 16,384 616,562,688
│ └─BatchNorm2d: 2-8 -- [3, 128, 112, 112] 256 768
│ └─ReLU6: 2-9 -- [3, 128, 112, 112] -- --
============================================================================================================================================
Total params: 26,496
Trainable params: 26,496
Non-trainable params: 0
Total mult-adds (G): 1.89
============================================================================================================================================
Input size (MB): 38.54
Forward/backward pass size (MB): 462.42
Params size (MB): 0.11
Estimated Total Size (MB): 501.06
============================================================================================================================================
Now we can compare the summaries of each block. From the above cells we can observe that the inputs and output shapes remains the same
Total params: 73,984
Trainable params: 73,984
Non-trainable params: 0
Total mult-adds (G): 2.77
Total params: 9,152
Trainable params: 9,152
Non-trainable params: 0
Total mult-adds (M): 329.96
Total params: 26,496
Trainable params: 26,496
Non-trainable params: 0
Total mult-adds (G): 1.89
If you look at the outputs of torchinfo you can see that the estimated total size is more for mobileNets than simpleConv block this isbecause we need to store 2 times the intermediate values during training , but this wont be a problem for inference, during inference we only need to store the parameters and architecture, and thus looking above we can see that way fewer parameters and total number of multiplications and additions needed is also low which helps in faster inference. If you want more info please read the papers which are well written. If you want to read about how the torchinfo works please read this blog by Jacob C. Kimmel
Actually all the above were taken from torchvision only and we can do the same easily with torchvision classes as shown below . All credits are to the amazing torchvision library
Input shape = torch.Size([3, 64, 224, 224])
Output shap = torch.Size([3, 128, 112, 112])
Input shape = torch.Size([3, 64, 224, 224])
Output shap = torch.Size([3, 128, 112, 112])
summary(TorchMobileNetV1Block,input_size=input_size,col_names=["kernel_size", "output_size", "num_params", "mult_adds"])
============================================================================================================================================
Layer (type:depth-idx) Kernel Shape Output Shape Param # Mult-Adds
============================================================================================================================================
InvertedResidual -- [3, 128, 112, 112] -- --
├─Sequential: 1-1 -- [3, 128, 112, 112] -- --
│ └─ConvNormActivation: 2-1 -- [3, 64, 112, 112] -- --
│ │ └─Conv2d: 3-1 [3, 3] [3, 64, 112, 112] 576 21,676,032
│ │ └─BatchNorm2d: 3-2 -- [3, 64, 112, 112] 128 384
│ │ └─ReLU6: 3-3 -- [3, 64, 112, 112] -- --
│ └─Conv2d: 2-2 [1, 1] [3, 128, 112, 112] 8,192 308,281,344
│ └─BatchNorm2d: 2-3 -- [3, 128, 112, 112] 256 768
============================================================================================================================================
Total params: 9,152
Trainable params: 9,152
Non-trainable params: 0
Total mult-adds (M): 329.96
============================================================================================================================================
Input size (MB): 38.54
Forward/backward pass size (MB): 115.61
Params size (MB): 0.04
Estimated Total Size (MB): 154.18
============================================================================================================================================
summary(TorchMobileNetV2Block,input_size=input_size,col_names=["kernel_size", "output_size", "num_params", "mult_adds"])
============================================================================================================================================
Layer (type:depth-idx) Kernel Shape Output Shape Param # Mult-Adds
============================================================================================================================================
InvertedResidual -- [3, 128, 112, 112] -- --
├─Sequential: 1-1 -- [3, 128, 112, 112] -- --
│ └─ConvNormActivation: 2-1 -- [3, 128, 224, 224] -- --
│ │ └─Conv2d: 3-1 [1, 1] [3, 128, 224, 224] 8,192 1,233,125,376
│ │ └─BatchNorm2d: 3-2 -- [3, 128, 224, 224] 256 768
│ │ └─ReLU6: 3-3 -- [3, 128, 224, 224] -- --
│ └─ConvNormActivation: 2-2 -- [3, 128, 112, 112] -- --
│ │ └─Conv2d: 3-4 [3, 3] [3, 128, 112, 112] 1,152 43,352,064
│ │ └─BatchNorm2d: 3-5 -- [3, 128, 112, 112] 256 768
│ │ └─ReLU6: 3-6 -- [3, 128, 112, 112] -- --
│ └─Conv2d: 2-3 [1, 1] [3, 128, 112, 112] 16,384 616,562,688
│ └─BatchNorm2d: 2-4 -- [3, 128, 112, 112] 256 768
============================================================================================================================================
Total params: 26,496
Trainable params: 26,496
Non-trainable params: 0
Total mult-adds (G): 1.89
============================================================================================================================================
Input size (MB): 38.54
Forward/backward pass size (MB): 462.42
Params size (MB): 0.11
Estimated Total Size (MB): 501.06
============================================================================================================================================